library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(maps) # for map data
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(ggmap) # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
## Linking to GEOS 3.9.1, GDAL 3.4.0, PROJ 8.1.1; sf_use_s2() is TRUE
library(leaflet) # for highly customizable mapping
library(ggthemes)
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(gifski) # for creating the gif (don't need to load this library every time,but need it installed)
library(transformr) # for "tweening" (gganimate)
##
## Attaching package: 'transformr'
## The following object is masked from 'package:sf':
##
## st_normalize
library(shiny) # for creating interactive apps
library(patchwork) # for nicely combining ggplot2 graphs
library(gt) # for creating nice tables
##
## Attaching package: 'gt'
## The following object is masked from 'package:openintro':
##
## sp500
library(countrycode)
theme_set(theme_minimal())
library(geofacet)
library(tidytext)
olympics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-07-27/olympics.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, sex, team, noc, games, season, city, sport, event, medal
## dbl (5): id, age, height, weight, year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
world <- map_data("world")
summer_gold <- olympics %>%
filter(season == "Summer") %>%
drop_na(medal) %>%
filter(medal == "Gold") %>%
mutate(countryName = countrycode(noc, "genc3c", "country.name")) %>%
mutate(countryName = ifelse(countryName == "USA", "United States", countryName)) %>%
mutate(countryName = ifelse(noc == "ALG", "Algeria", countryName)) %>%
mutate(countryName = ifelse(noc == "BAH", "Bahamas", countryName)) %>%
mutate(countryName = ifelse(noc == "BUL", "Bulgaria", countryName)) %>%
mutate(countryName = ifelse(noc == "CHI", "Chile", countryName)) %>%
mutate(countryName = ifelse(noc == "CRC", "Costa Rica", countryName)) %>%
mutate(countryName = ifelse(noc == "CRO", "Croatia", countryName)) %>%
mutate(countryName = ifelse(noc == "DEN", "Denmark", countryName)) %>%
mutate(countryName = ifelse(noc == "FIJ", "Fiji", countryName)) %>%
mutate(countryName = ifelse(noc == "GER", "Germany", countryName)) %>%
mutate(countryName = ifelse(noc == "GRE", "Greece", countryName)) %>%
mutate(countryName = ifelse(noc == "GRN", "Grenada", countryName)) %>%
mutate(countryName = ifelse(noc == "HAI", "Haiti", countryName)) %>%
mutate(countryName = ifelse(noc == "INA", "Indonesia", countryName)) %>%
mutate(countryName = ifelse(noc == "IRI", "Iran", countryName)) %>%
mutate(countryName = ifelse(noc == "LAT", "Latvia", countryName)) %>%
mutate(countryName = ifelse(noc == "MGL", "Mongolia", countryName)) %>%
mutate(countryName = ifelse(noc == "NED", "Netherlands", countryName)) %>%
mutate(countryName = ifelse(noc == "NGR", "Nigeria", countryName)) %>%
mutate(countryName = ifelse(noc == "POR", "Portugal", countryName)) %>%
mutate(countryName = ifelse(noc == "PUR", "Puerto Rico", countryName)) %>%
mutate(countryName = ifelse(noc == "RSA", "South Africa", countryName)) %>%
mutate(countryName = ifelse(noc == "SLO", "Slovenia", countryName)) %>%
mutate(countryName = ifelse(noc == "SUI", "Switzerland", countryName)) %>%
mutate(countryName = ifelse(noc == "UAE", "United Arab Emirates", countryName)) %>%
mutate(countryName = ifelse(noc == "URU", "Uruguay", countryName)) %>%
mutate(countryName = ifelse(noc == "VIE", "Vietnam", countryName)) %>%
mutate(countryName = ifelse(noc == "ZIM", "Zimbabwe", countryName)) %>%
drop_na(countryName)
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: ALG, ANZ, BAH, BUL, CHI, CRC, CRO, DEN, EUN, FIJ, FRG, GDR, GER, GRE, GRN, HAI, INA, IOA, IRI, KOS, LAT, MGL, NED, NGR, POR, PUR, RSA, SCG, SLO, SUI, TCH, TPE, UAE, URS, URU, VIE, YUG, ZIM
#taking time to find the codes that weren't matched and double-checking to see if the countries exist --> add to data set
# Footnote or mention in part of the project the choice to leave out territories not fully recognized, such as Kosovo and Chinese Taipei, and countries that no longer exist, including the Soviet Union and Yugoslavia.
summer_gold
## # A tibble: 9,806 × 16
## id name sex age height weight team noc games year season city
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 4 Edgar … M 34 NA NA Denma… DEN 1900… 1900 Summer Paris
## 2 17 Paavo … M 28 175 64 Finla… FIN 1948… 1948 Summer Lond…
## 3 17 Paavo … M 28 175 64 Finla… FIN 1948… 1948 Summer Lond…
## 4 17 Paavo … M 28 175 64 Finla… FIN 1948… 1948 Summer Lond…
## 5 21 Ragnhi… F 27 163 NA Norway NOR 2008… 2008 Summer Beij…
## 6 42 Thomas… M 25 NA NA Taifun NOR 1912… 1912 Summer Stoc…
## 7 56 Ren Ab… M 21 NA NA France FRA 1956… 1956 Summer Melb…
## 8 72 Alekse… M 28 180 83 Belar… BLR 2008… 2008 Summer Beij…
## 9 73 Luc Ab… M 23 182 86 France FRA 2008… 2008 Summer Beij…
## 10 73 Luc Ab… M 27 182 86 France FRA 2012… 2012 Summer Lond…
## # … with 9,796 more rows, and 4 more variables: sport <chr>, event <chr>,
## # medal <chr>, countryName <chr>
countriesone<-summer_gold %>%
group_by(countryName) %>%
count(medal) %>%
arrange(desc(n)) #results in a medal count in descending order
Separate and compare each event and which countries earn the most gold - bar graph of all the countries that win gold - color by event
summer_gold %>%
group_by(countryName) %>%
count() %>%
ggplot(aes(x=n, y=reorder(countryName,n)))+
geom_col()+
labs(title="Number of gold medals earned per country")
summer_gold%>%
group_by(countryName, sport) %>%
count() %>%
filter(n>20) %>%
ungroup %>%
mutate(sport2=reorder_within(sport,n,countryName)) %>%
ggplot(aes(x=n, y=sport2, fill=sport))+
geom_col()+
facet_wrap(vars(fct_reorder(countryName,n,sum,.desc = TRUE)),scales = "free_y")+
scale_y_reordered()+
labs(title="Countries that have won more than 20 gold medals")
Line graph
summer_all <- olympics %>%
filter(season == "Summer") %>%
mutate(countryName = countrycode(noc, "genc3c", "country.name")) %>%
mutate(countryName = ifelse(countryName == "United States", "USA", countryName)) %>%
mutate(countryName = ifelse(noc == "ALG", "Algeria", countryName)) %>%
mutate(countryName = ifelse(noc == "BAH", "Bahamas", countryName)) %>%
mutate(countryName = ifelse(noc == "BUL", "Bulgaria", countryName)) %>%
mutate(countryName = ifelse(noc == "CHI", "Chile", countryName)) %>%
mutate(countryName = ifelse(noc == "CRC", "Costa Rica", countryName)) %>%
mutate(countryName = ifelse(noc == "CRO", "Croatia", countryName)) %>%
mutate(countryName = ifelse(noc == "DEN", "Denmark", countryName)) %>%
mutate(countryName = ifelse(noc == "FIJ", "Fiji", countryName)) %>%
mutate(countryName = ifelse(noc == "GER", "Germany", countryName)) %>%
mutate(countryName = ifelse(noc == "GRE", "Greece", countryName)) %>%
mutate(countryName = ifelse(noc == "GRN", "Grenada", countryName)) %>%
mutate(countryName = ifelse(noc == "HAI", "Haiti", countryName)) %>%
mutate(countryName = ifelse(noc == "INA", "Indonesia", countryName)) %>%
mutate(countryName = ifelse(noc == "IRI", "Iran", countryName)) %>%
mutate(countryName = ifelse(noc == "LAT", "Latvia", countryName)) %>%
mutate(countryName = ifelse(noc == "MGL", "Mongolia", countryName)) %>%
mutate(countryName = ifelse(noc == "NED", "Netherlands", countryName)) %>%
mutate(countryName = ifelse(noc == "NGR", "Nigeria", countryName)) %>%
mutate(countryName = ifelse(noc == "POR", "Portugal", countryName)) %>%
mutate(countryName = ifelse(noc == "PUR", "Puerto Rico", countryName)) %>%
mutate(countryName = ifelse(noc == "RSA", "South Africa", countryName)) %>%
mutate(countryName = ifelse(noc == "SLO", "Slovenia", countryName)) %>%
mutate(countryName = ifelse(noc == "SUI", "Switzerland", countryName)) %>%
mutate(countryName = ifelse(noc == "UAE", "United Arab Emirates", countryName)) %>%
mutate(countryName = ifelse(noc == "URU", "Uruguay", countryName)) %>%
mutate(countryName = ifelse(noc == "VIE", "Vietnam", countryName)) %>%
mutate(countryName = ifelse(noc == "ZIM", "Zimbabwe", countryName)) %>%
drop_na(countryName)
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: AHO, ALG, ANG, ANT, ANZ, ARU, ASA, BAH, BAN, BAR, BER, BHU, BIZ, BOH, BOT, BRU, BUL, BUR, CAM, CAY, CGO, CHA, CHI, CRC, CRO, CRT, DEN, ESA, EUN, FIJ, FRG, GAM, GBS, GDR, GEQ, GER, GRE, GRN, GUA, GUI, HAI, HON, INA, IOA, IRI, ISV, IVB, KOS, KSA, KUW, LAT, LBA, LES, LIB, MAD, MAL, MAS, MAW, MGL, MON, MRI, MTN, MYA, NBO, NCA, NED, NEP, NFL, NGR, NIG, OMA, PAR, PHI, PLE, POR, PUR, RHO, ROT, RSA, SAA, SAM, SCG, SEY, SKN, SLO, SOL, SRI, SUD, SUI, TAN, TCH, TGA, TOG, TPE, UAE, UAR, UNK, URS, URU, VAN, VIE, VIN, WIF, YAR, YMD, YUG, ZAM, ZIM
prop_of_medals<-summer_all %>%
drop_na(medal) %>%
filter(countryName %in% c("USA", "United Kingdom", "Germany", "France", "Italy", "Australia", "Hungary", "Sweden", "Netherlands", "China")) %>%
count(countryName, medal) %>%
group_by(countryName) %>%
mutate(proportion=n/sum(n))
graph1<-prop_of_medals %>%
ggplot(aes(y=countryName,x=proportion,fill=fct_relevel(medal,c("Bronze","Silver"))))+
scale_fill_manual(values=c("Bronze"="#a17419",Silver = "#b7b7b7", Gold = "#d5a500"))+
geom_col()+
labs(title="Distribution of medals won by the top 10 countries",y="",x="Proportion", fill=NULL)
graph2<-prop_of_medals %>%
ggplot(aes(y=fct_reorder(countryName,n,sum),x=n,fill=fct_relevel(medal,c("Bronze","Silver"))))+
scale_fill_manual(values=c("Bronze"="#a17419",Silver = "#b7b7b7", Gold = "#d5a500"))+
geom_col()+
theme(legend.title = element_blank())+
labs(title = "Number of medals won by the top 10 countries", y="", x="Count", fill=NULL)
ggplotly(graph1)
graph2